package com.cw.spider.utils;

import com.alibaba.fastjson.JSONObject;
import com.gargoylesoftware.htmlunit.BrowserVersion;
import com.gargoylesoftware.htmlunit.ProxyConfig;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
import org.seimicrawler.xpath.JXDocument;

import java.io.IOException;

/**
 * 描述:
 *
 * @author withqianqian@163.com
 * @create 2020-09-01 9:15
 */
public class HtmlUnitUtil {
    public static JXDocument getDoc(String url) {
        WebClient webClient = new WebClient(BrowserVersion.CHROME);
        webClient.getOptions().setCssEnabled(false);
        webClient.getOptions().setThrowExceptionOnScriptError(false);
        webClient.getOptions().setThrowExceptionOnFailingStatusCode(false);
        //ip 代理
        JSONObject ip = IPPool.get();
        if (ip != null) {
            ProxyConfig proxyConfig = new ProxyConfig(ip.getString("host"), ip.getIntValue("port"));
            webClient.getOptions().setProxyConfig(proxyConfig);
        }
        HtmlPage page = null;
        try {
            page = webClient.getPage(url);
        } catch (Exception e) {
            IPPool.remove(ip);
            return null;
        }
        return JXDocument.create(page.asXml());
    }
}
